/*-------------------------------------------------------------------------
 *
 * pg_buffercache_pages.c
 *      display some contents of the buffer cache
 *
 *      contrib/pg_buffercache/pg_buffercache_pages.c
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include "access/htup_details.h"
#include "catalog/pg_type.h"
#include "funcapi.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"


#define NUM_BUFFERCACHE_PAGES_MIN_ELEM    8
#define NUM_BUFFERCACHE_PAGES_ELEM    9

PG_MODULE_MAGIC;

/*
 * Record structure holding the to be exposed cache data.
 */
typedef struct
{
    uint32        bufferid;
    Oid            relfilenode;
    Oid            reltablespace;
    Oid            reldatabase;
    ForkNumber    forknum;
    BlockNumber blocknum;
    bool        isvalid;
    bool        isdirty;
    uint16        usagecount;

    /*
     * An int32 is sufficiently large, as MAX_BACKENDS prevents a buffer from
     * being pinned by too many backends and each backend will only pin once
     * because of bufmgr.c's PrivateRefCount infrastructure.
     */
    int32        pinning_backends;
} BufferCachePagesRec;


/*
 * Function context for data persisting over repeated calls.
 */
typedef struct
{
    TupleDesc    tupdesc;
    BufferCachePagesRec *record;
} BufferCachePagesContext;


/*
 * Function returning data from the shared buffer cache - buffer number,
 * relation node/tablespace/database/blocknum and dirty indicator.
 */
PG_FUNCTION_INFO_V1(pg_buffercache_pages);

Datum
pg_buffercache_pages(PG_FUNCTION_ARGS)
{
    FuncCallContext *funcctx;
    Datum        result;
    MemoryContext oldcontext;
    BufferCachePagesContext *fctx;    /* User function context. */
    TupleDesc    tupledesc;
    TupleDesc    expected_tupledesc;
    HeapTuple    tuple;

    if (SRF_IS_FIRSTCALL())
    {
        int            i;

        funcctx = SRF_FIRSTCALL_INIT();

        /* Switch context when allocating stuff to be used in later calls */
        oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

        /* Create a user function context for cross-call persistence */
        fctx = (BufferCachePagesContext *) palloc(sizeof(BufferCachePagesContext));

        /*
         * To smoothly support upgrades from version 1.0 of this extension
         * transparently handle the (non-)existence of the pinning_backends
         * column. We unfortunately have to get the result type for that... -
         * we can't use the result type determined by the function definition
         * without potentially crashing when somebody uses the old (or even
         * wrong) function definition though.
         */
        if (get_call_result_type(fcinfo, NULL, &expected_tupledesc) != TYPEFUNC_COMPOSITE)
            elog(ERROR, "return type must be a row type");

        if (expected_tupledesc->natts < NUM_BUFFERCACHE_PAGES_MIN_ELEM ||
            expected_tupledesc->natts > NUM_BUFFERCACHE_PAGES_ELEM)
            elog(ERROR, "incorrect number of output arguments");

        /* Construct a tuple descriptor for the result rows. */
        tupledesc = CreateTemplateTupleDesc(expected_tupledesc->natts, false);
        TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid",
                           INT4OID, -1, 0);
        TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode",
                           OIDOID, -1, 0);
        TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace",
                           OIDOID, -1, 0);
        TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase",
                           OIDOID, -1, 0);
        TupleDescInitEntry(tupledesc, (AttrNumber) 5, "relforknumber",
                           INT2OID, -1, 0);
        TupleDescInitEntry(tupledesc, (AttrNumber) 6, "relblocknumber",
                           INT8OID, -1, 0);
        TupleDescInitEntry(tupledesc, (AttrNumber) 7, "isdirty",
                           BOOLOID, -1, 0);
        TupleDescInitEntry(tupledesc, (AttrNumber) 8, "usage_count",
                           INT2OID, -1, 0);

        if (expected_tupledesc->natts == NUM_BUFFERCACHE_PAGES_ELEM)
            TupleDescInitEntry(tupledesc, (AttrNumber) 9, "pinning_backends",
                               INT4OID, -1, 0);

        fctx->tupdesc = BlessTupleDesc(tupledesc);

        /* Allocate NBuffers worth of BufferCachePagesRec records. */
        fctx->record = (BufferCachePagesRec *)
            MemoryContextAllocHuge(CurrentMemoryContext,
                                   sizeof(BufferCachePagesRec) * NBuffers);

        /* Set max calls and remember the user function context. */
        funcctx->max_calls = NBuffers;
        funcctx->user_fctx = fctx;

        /* Return to original context when allocating transient memory */
        MemoryContextSwitchTo(oldcontext);

        /*
         * Scan through all the buffers, saving the relevant fields in the
         * fctx->record structure.
         *
         * We don't hold the partition locks, so we don't get a consistent
         * snapshot across all buffers, but we do grab the buffer header
         * locks, so the information of each buffer is self-consistent.
         */
        for (i = 0; i < NBuffers; i++)
        {
            BufferDesc *bufHdr;
            uint32        buf_state;

            bufHdr = GetBufferDescriptor(i);
            /* Lock each buffer header before inspecting. */
            buf_state = LockBufHdr(bufHdr);

            fctx->record[i].bufferid = BufferDescriptorGetBuffer(bufHdr);
            fctx->record[i].relfilenode = bufHdr->tag.rnode.relNode;
            fctx->record[i].reltablespace = bufHdr->tag.rnode.spcNode;
            fctx->record[i].reldatabase = bufHdr->tag.rnode.dbNode;
            fctx->record[i].forknum = bufHdr->tag.forkNum;
            fctx->record[i].blocknum = bufHdr->tag.blockNum;
            fctx->record[i].usagecount = BUF_STATE_GET_USAGECOUNT(buf_state);
            fctx->record[i].pinning_backends = BUF_STATE_GET_REFCOUNT(buf_state);

            if (buf_state & BM_DIRTY)
                fctx->record[i].isdirty = true;
            else
                fctx->record[i].isdirty = false;

            /* Note if the buffer is valid, and has storage created */
            if ((buf_state & BM_VALID) && (buf_state & BM_TAG_VALID))
                fctx->record[i].isvalid = true;
            else
                fctx->record[i].isvalid = false;

            UnlockBufHdr(bufHdr, buf_state);
        }
    }

    funcctx = SRF_PERCALL_SETUP();

    /* Get the saved state */
    fctx = funcctx->user_fctx;

    if (funcctx->call_cntr < funcctx->max_calls)
    {
        uint32        i = funcctx->call_cntr;
        Datum        values[NUM_BUFFERCACHE_PAGES_ELEM];
        bool        nulls[NUM_BUFFERCACHE_PAGES_ELEM];

        values[0] = Int32GetDatum(fctx->record[i].bufferid);
        nulls[0] = false;

        /*
         * Set all fields except the bufferid to null if the buffer is unused
         * or not valid.
         */
        if (fctx->record[i].blocknum == InvalidBlockNumber ||
            fctx->record[i].isvalid == false)
        {
            nulls[1] = true;
            nulls[2] = true;
            nulls[3] = true;
            nulls[4] = true;
            nulls[5] = true;
            nulls[6] = true;
            nulls[7] = true;
            /* unused for v1.0 callers, but the array is always long enough */
            nulls[8] = true;
        }
        else
        {
            values[1] = ObjectIdGetDatum(fctx->record[i].relfilenode);
            nulls[1] = false;
            values[2] = ObjectIdGetDatum(fctx->record[i].reltablespace);
            nulls[2] = false;
            values[3] = ObjectIdGetDatum(fctx->record[i].reldatabase);
            nulls[3] = false;
            values[4] = ObjectIdGetDatum(fctx->record[i].forknum);
            nulls[4] = false;
            values[5] = Int64GetDatum((int64) fctx->record[i].blocknum);
            nulls[5] = false;
            values[6] = BoolGetDatum(fctx->record[i].isdirty);
            nulls[6] = false;
            values[7] = Int16GetDatum(fctx->record[i].usagecount);
            nulls[7] = false;
            /* unused for v1.0 callers, but the array is always long enough */
            values[8] = Int32GetDatum(fctx->record[i].pinning_backends);
            nulls[8] = false;
        }

        /* Build and return the tuple. */
        tuple = heap_form_tuple(fctx->tupdesc, values, nulls);
        result = HeapTupleGetDatum(tuple);

        SRF_RETURN_NEXT(funcctx, result);
    }
    else
        SRF_RETURN_DONE(funcctx);
}
